In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import plotly.express as px #for visualization
import matplotlib.pyplot as plt #for visualization
In [2]:
df100=pd.read_csv("C:\\Users\\2001n\\OneDrive\\Documents\\CHURN_DATASET.csv")
df100
Out[2]:
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | ... | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | ... | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | ... | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 6840-RESVB | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | ... | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
| 7039 | 2234-XADUH | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | ... | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
| 7040 | 4801-JZAZL | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | 8361-LTMKD | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
| 7042 | 3186-AJIEK | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | ... | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 21 columns
In [3]:
df=df100.drop(["customerID"],axis = 1)
df
Out[3]:
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
| 7039 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
| 7040 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
| 7042 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 20 columns
In [4]:
df.shape
Out[4]:
(7043, 20)
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 gender 7043 non-null object 1 SeniorCitizen 7043 non-null int64 2 Partner 7043 non-null object 3 Dependents 7043 non-null object 4 tenure 7043 non-null int64 5 PhoneService 7043 non-null object 6 MultipleLines 7043 non-null object 7 InternetService 7043 non-null object 8 OnlineSecurity 7043 non-null object 9 OnlineBackup 7043 non-null object 10 DeviceProtection 7043 non-null object 11 TechSupport 7043 non-null object 12 StreamingTV 7043 non-null object 13 StreamingMovies 7043 non-null object 14 Contract 7043 non-null object 15 PaperlessBilling 7043 non-null object 16 PaymentMethod 7043 non-null object 17 MonthlyCharges 7043 non-null float64 18 TotalCharges 7043 non-null object 19 Churn 7043 non-null object dtypes: float64(1), int64(2), object(17) memory usage: 1.1+ MB
In [6]:
def dataoveriew(df, message):
print(f'{message}:n') # it contains expressions in the braces
print('Number of rows: ' , df.shape[0])
print("Number of features:", df.shape[1])
print(df.columns.tolist())
print("Missing values:", df.isnull().sum().values.sum())
print("Unique values:")
print(df.nunique())
dataoveriew(df, 'Overview of the dataset')
Overview of the dataset:n Number of rows: 7043 Number of features: 20 ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'] Missing values: 0 Unique values: gender 2 SeniorCitizen 2 Partner 2 Dependents 2 tenure 73 PhoneService 2 MultipleLines 3 InternetService 3 OnlineSecurity 3 OnlineBackup 3 DeviceProtection 3 TechSupport 3 StreamingTV 3 StreamingMovies 3 Contract 3 PaperlessBilling 2 PaymentMethod 4 MonthlyCharges 1585 TotalCharges 6531 Churn 2 dtype: int64
In [7]:
missing = pd.DataFrame((df.isnull().sum())*100/df.shape[0]).reset_index()
plt.figure(figsize=(16,5))
sns.pointplot(x ='index',y = 0,data=missing)
plt.xticks(rotation =90,fontsize =7)
plt.title("Percentage of Missing values")
plt.ylabel("PERCENTAGE")
plt.show()
In [8]:
df.TotalCharges = pd.to_numeric(df.TotalCharges, errors='coerce')
df.isnull().sum()
Out[8]:
gender 0 SeniorCitizen 0 Partner 0 Dependents 0 tenure 0 PhoneService 0 MultipleLines 0 InternetService 0 OnlineSecurity 0 OnlineBackup 0 DeviceProtection 0 TechSupport 0 StreamingTV 0 StreamingMovies 0 Contract 0 PaperlessBilling 0 PaymentMethod 0 MonthlyCharges 0 TotalCharges 11 Churn 0 dtype: int64
In [9]:
df.loc[df['TotalCharges'].isnull() == True]
Out[9]:
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 488 | Female | 0 | Yes | Yes | 0 | No | No phone service | DSL | Yes | No | Yes | Yes | Yes | No | Two year | Yes | Bank transfer (automatic) | 52.55 | NaN | No |
| 753 | Male | 0 | No | Yes | 0 | Yes | No | No | No internet service | No internet service | No internet service | No internet service | No internet service | No internet service | Two year | No | Mailed check | 20.25 | NaN | No |
| 936 | Female | 0 | Yes | Yes | 0 | Yes | No | DSL | Yes | Yes | Yes | No | Yes | Yes | Two year | No | Mailed check | 80.85 | NaN | No |
| 1082 | Male | 0 | Yes | Yes | 0 | Yes | Yes | No | No internet service | No internet service | No internet service | No internet service | No internet service | No internet service | Two year | No | Mailed check | 25.75 | NaN | No |
| 1340 | Female | 0 | Yes | Yes | 0 | No | No phone service | DSL | Yes | Yes | Yes | Yes | Yes | No | Two year | No | Credit card (automatic) | 56.05 | NaN | No |
| 3331 | Male | 0 | Yes | Yes | 0 | Yes | No | No | No internet service | No internet service | No internet service | No internet service | No internet service | No internet service | Two year | No | Mailed check | 19.85 | NaN | No |
| 3826 | Male | 0 | Yes | Yes | 0 | Yes | Yes | No | No internet service | No internet service | No internet service | No internet service | No internet service | No internet service | Two year | No | Mailed check | 25.35 | NaN | No |
| 4380 | Female | 0 | Yes | Yes | 0 | Yes | No | No | No internet service | No internet service | No internet service | No internet service | No internet service | No internet service | Two year | No | Mailed check | 20.00 | NaN | No |
| 5218 | Male | 0 | Yes | Yes | 0 | Yes | No | No | No internet service | No internet service | No internet service | No internet service | No internet service | No internet service | One year | Yes | Mailed check | 19.70 | NaN | No |
| 6670 | Female | 0 | Yes | Yes | 0 | Yes | Yes | DSL | No | Yes | Yes | Yes | Yes | No | Two year | No | Mailed check | 73.35 | NaN | No |
| 6754 | Male | 0 | No | Yes | 0 | Yes | Yes | DSL | Yes | Yes | No | Yes | No | No | Two year | Yes | Bank transfer (automatic) | 61.90 | NaN | No |
In [10]:
df.dropna(how = 'any', inplace = True)
In [11]:
df.info()
<class 'pandas.core.frame.DataFrame'> Index: 7032 entries, 0 to 7042 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 gender 7032 non-null object 1 SeniorCitizen 7032 non-null int64 2 Partner 7032 non-null object 3 Dependents 7032 non-null object 4 tenure 7032 non-null int64 5 PhoneService 7032 non-null object 6 MultipleLines 7032 non-null object 7 InternetService 7032 non-null object 8 OnlineSecurity 7032 non-null object 9 OnlineBackup 7032 non-null object 10 DeviceProtection 7032 non-null object 11 TechSupport 7032 non-null object 12 StreamingTV 7032 non-null object 13 StreamingMovies 7032 non-null object 14 Contract 7032 non-null object 15 PaperlessBilling 7032 non-null object 16 PaymentMethod 7032 non-null object 17 MonthlyCharges 7032 non-null float64 18 TotalCharges 7032 non-null float64 19 Churn 7032 non-null object dtypes: float64(2), int64(2), object(16) memory usage: 1.1+ MB
In [12]:
df.shape
Out[12]:
(7032, 20)
In [13]:
df1 = df["Churn"].value_counts().reset_index() # Reset index to create columns, Defult index
df1.columns = ['Category', 'count'] # Rename columns explicitly
df1
Out[13]:
| Category | count | |
|---|---|---|
| 0 | No | 5163 |
| 1 | Yes | 1869 |
In [14]:
df1 = df["Churn"].value_counts().reset_index() # Reset index to create columns, Defult index
df1.columns = ['Category', 'count'] # Rename columns explicitly
fig = px.pie(df1, values='count', names='Category',color_discrete_sequence=["green", "red"],
title='Distribution of Churn')
fig.show()
In [15]:
df2 = df.copy()
In [16]:
labels = ["{0} - {1}".format(i, i + 11) for i in range(1, 72, 12)]
df2['tenure_group'] = pd.cut(df.tenure, range(1, 80, 12), right=False, labels=labels)
In [17]:
df2.tenure_group.value_counts()
Out[17]:
tenure_group 1 - 12 2175 61 - 72 1407 13 - 24 1024 25 - 36 832 49 - 60 832 37 - 48 762 Name: count, dtype: int64
In [18]:
df2
Out[18]:
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | ... | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | tenure_group | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | ... | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 1 - 12 |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | ... | No | No | No | One year | No | Mailed check | 56.95 | 1889.50 | No | 25 - 36 |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | ... | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 - 12 |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | ... | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 37 - 48 |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | ... | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 1 - 12 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | ... | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.50 | No | 13 - 24 |
| 7039 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | ... | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.90 | No | 61 - 72 |
| 7040 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | ... | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 1 - 12 |
| 7041 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | ... | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.60 | Yes | 1 - 12 |
| 7042 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | ... | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.50 | No | 61 - 72 |
7032 rows × 21 columns
In [19]:
df2.shape
Out[19]:
(7032, 21)
In [ ]:
In [20]:
import warnings
warnings.filterwarnings('ignore')
In [21]:
df2.drop(columns= ['tenure'], axis=1, inplace=True)
df2
Out[21]:
| gender | SeniorCitizen | Partner | Dependents | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | tenure_group | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 1 - 12 |
| 1 | Male | 0 | No | No | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.50 | No | 25 - 36 |
| 2 | Male | 0 | No | No | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 - 12 |
| 3 | Male | 0 | No | No | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 37 - 48 |
| 4 | Female | 0 | No | No | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 1 - 12 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | 0 | Yes | Yes | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.50 | No | 13 - 24 |
| 7039 | Female | 0 | Yes | Yes | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.90 | No | 61 - 72 |
| 7040 | Female | 0 | Yes | Yes | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 1 - 12 |
| 7041 | Male | 1 | Yes | No | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.60 | Yes | 1 - 12 |
| 7042 | Male | 0 | No | No | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.50 | No | 61 - 72 |
7032 rows × 20 columns
In [22]:
for i, predictor in enumerate(df2.drop(columns=['Churn', 'TotalCharges', 'MonthlyCharges'])):
plt.figure(i)
plt.xticks(rotation = 90)
sns.countplot(data=df2, x=predictor, hue='Churn')
In [ ]:
In [ ]:
In [23]:
def hist(feature):
df2 = df.groupby([feature, 'Churn']).size().reset_index()
df2 = df2.rename(columns={0: 'Count'}) # Corrected line
fig = px.histogram(df2, x=feature, y='Count', color='Churn', marginal='box'
, title=('Churn rate frequency to distribution')
, color_discrete_sequence=["green", "red"])
fig.show()
In [24]:
hist('tenure')
hist('MonthlyCharges')
hist('TotalCharges')
In [136]:
bin_df = pd.DataFrame()
#Update the binning dataframe
bin_df['tenure_bins'] = pd.qcut(df['tenure'], q=3, labels= ['low', 'medium', 'high'])
bin_df['MonthlyCharges_bins'] = pd.qcut(df['MonthlyCharges'], q=3, labels= ['low', 'medium', 'high'])
bin_df['TotalCharges_bins'] = pd.qcut(df['TotalCharges'], q=3, labels= ['low', 'medium', 'high'])
bin_df['Churn'] = df['Churn']
In [138]:
def plot_bar(feature, data):
fig = px.bar(data.groupby([feature, 'Churn']).size().reset_index(),
x=feature, y=0, color='Churn',
title=f'Churn rate frequency to {feature} bins',
barmode='group',
color_discrete_sequence=["green", "red"])
fig.show()
plot_bar('tenure_bins', bin_df)
plot_bar('MonthlyCharges_bins', bin_df)
plot_bar('TotalCharges_bins', bin_df)
DATA PREPROCESSING
In [27]:
from ydata_profiling import ProfileReport
profile = ProfileReport(df, title="Profiling Report")
In [28]:
profile
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
Out[28]:
In [29]:
from sklearn import preprocessing
In [30]:
df3 = pd.get_dummies(df2, columns=['gender','Partner','Dependents','tenure_group','PhoneService','MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','Contract','PaperlessBilling','PaymentMethod','Churn'])
In [31]:
df3
Out[31]:
| SeniorCitizen | MonthlyCharges | TotalCharges | gender_Female | gender_Male | Partner_No | Partner_Yes | Dependents_No | Dependents_Yes | tenure_group_1 - 12 | ... | Contract_One year | Contract_Two year | PaperlessBilling_No | PaperlessBilling_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | Churn_No | Churn_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 29.85 | 29.85 | True | False | False | True | True | False | True | ... | False | False | False | True | False | False | True | False | True | False |
| 1 | 0 | 56.95 | 1889.50 | False | True | True | False | True | False | False | ... | True | False | True | False | False | False | False | True | True | False |
| 2 | 0 | 53.85 | 108.15 | False | True | True | False | True | False | True | ... | False | False | False | True | False | False | False | True | False | True |
| 3 | 0 | 42.30 | 1840.75 | False | True | True | False | True | False | False | ... | True | False | True | False | True | False | False | False | True | False |
| 4 | 0 | 70.70 | 151.65 | True | False | True | False | True | False | True | ... | False | False | False | True | False | False | True | False | False | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 84.80 | 1990.50 | False | True | False | True | False | True | False | ... | True | False | False | True | False | False | False | True | True | False |
| 7039 | 0 | 103.20 | 7362.90 | True | False | False | True | False | True | False | ... | True | False | False | True | False | True | False | False | True | False |
| 7040 | 0 | 29.60 | 346.45 | True | False | False | True | False | True | True | ... | False | False | False | True | False | False | True | False | True | False |
| 7041 | 1 | 74.40 | 306.60 | False | True | False | True | True | False | True | ... | False | False | False | True | False | False | False | True | False | True |
| 7042 | 0 | 105.65 | 6844.50 | False | True | True | False | True | False | False | ... | False | True | False | True | True | False | False | False | True | False |
7032 rows × 52 columns
In [32]:
df4 = df3.replace({True :1,False: 0})
In [33]:
df4
Out[33]:
| SeniorCitizen | MonthlyCharges | TotalCharges | gender_Female | gender_Male | Partner_No | Partner_Yes | Dependents_No | Dependents_Yes | tenure_group_1 - 12 | ... | Contract_One year | Contract_Two year | PaperlessBilling_No | PaperlessBilling_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | Churn_No | Churn_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 29.85 | 29.85 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
| 1 | 0 | 56.95 | 1889.50 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | ... | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
| 2 | 0 | 53.85 | 108.15 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 |
| 3 | 0 | 42.30 | 1840.75 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | ... | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
| 4 | 0 | 70.70 | 151.65 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 84.80 | 1990.50 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | ... | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 |
| 7039 | 0 | 103.20 | 7362.90 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 |
| 7040 | 0 | 29.60 | 346.45 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
| 7041 | 1 | 74.40 | 306.60 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 |
| 7042 | 0 | 105.65 | 6844.50 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | ... | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
7032 rows × 52 columns
In [34]:
# Step 1: Exclude non-numeric columns
numeric_df = df4.select_dtypes(include=['number'])
# Step 2: Check for non-numeric columns with string values
non_numeric_cols = df4.columns.difference(numeric_df.columns)
# If there are non-numeric columns with string values, you can drop them or convert them to numeric if possible.
# For example, to drop them:
df4 = df4.drop(columns=non_numeric_cols)
# Now, you can calculate the correlation matrix
c_matrix = df4.corr()
In [35]:
c_matrix
Out[35]:
| SeniorCitizen | MonthlyCharges | TotalCharges | gender_Female | gender_Male | Partner_No | Partner_Yes | Dependents_No | Dependents_Yes | tenure_group_1 - 12 | ... | Contract_One year | Contract_Two year | PaperlessBilling_No | PaperlessBilling_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | Churn_No | Churn_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SeniorCitizen | 1.000000 | 0.219874 | 0.102411 | 0.001819 | -0.001819 | -0.016957 | 0.016957 | 0.210550 | -0.210550 | -0.027713 | ... | -0.046491 | -0.116205 | -0.156258 | 0.156258 | -0.016235 | -0.024359 | 0.171322 | -0.152987 | -0.150541 | 0.150541 |
| MonthlyCharges | 0.219874 | 1.000000 | 0.651065 | 0.013779 | -0.013779 | -0.097825 | 0.097825 | 0.112343 | -0.112343 | -0.191881 | ... | 0.004810 | -0.073256 | -0.351930 | 0.351930 | 0.042410 | 0.030055 | 0.271117 | -0.376568 | -0.192858 | 0.192858 |
| TotalCharges | 0.102411 | 0.651065 | 1.000000 | -0.000048 | 0.000048 | -0.319072 | 0.319072 | -0.064653 | 0.064653 | -0.592443 | ... | 0.170569 | 0.358036 | -0.157830 | 0.157830 | 0.186119 | 0.182663 | -0.060436 | -0.294708 | 0.199484 | -0.199484 |
| gender_Female | 0.001819 | 0.013779 | -0.000048 | 1.000000 | -1.000000 | -0.001379 | 0.001379 | 0.010349 | -0.010349 | 0.001050 | ... | -0.007755 | 0.003603 | -0.011902 | 0.011902 | 0.015973 | -0.001632 | -0.000844 | -0.013199 | -0.008545 | 0.008545 |
| gender_Male | -0.001819 | -0.013779 | 0.000048 | -1.000000 | 1.000000 | 0.001379 | -0.001379 | -0.010349 | 0.010349 | -0.001050 | ... | 0.007755 | -0.003603 | 0.011902 | -0.011902 | -0.015973 | 0.001632 | 0.000844 | 0.013199 | 0.008545 | -0.008545 |
| Partner_No | -0.016957 | -0.097825 | -0.319072 | -0.001379 | 0.001379 | 1.000000 | -1.000000 | 0.452269 | -0.452269 | 0.305061 | ... | -0.083067 | -0.247334 | -0.013957 | 0.013957 | -0.111406 | -0.082327 | 0.083207 | 0.096948 | -0.149982 | 0.149982 |
| Partner_Yes | 0.016957 | 0.097825 | 0.319072 | 0.001379 | -0.001379 | -1.000000 | 1.000000 | -0.452269 | 0.452269 | -0.305061 | ... | 0.083067 | 0.247334 | 0.013957 | -0.013957 | 0.111406 | 0.082327 | -0.083207 | -0.096948 | 0.149982 | -0.149982 |
| Dependents_No | 0.210550 | 0.112343 | -0.064653 | 0.010349 | -0.010349 | 0.452269 | -0.452269 | 1.000000 | -1.000000 | 0.145379 | ... | -0.069222 | -0.201699 | -0.110131 | 0.110131 | -0.052369 | -0.061134 | 0.149274 | -0.056448 | -0.163128 | 0.163128 |
| Dependents_Yes | -0.210550 | -0.112343 | 0.064653 | -0.010349 | 0.010349 | -0.452269 | 0.452269 | -1.000000 | 1.000000 | -0.145379 | ... | 0.069222 | 0.201699 | 0.110131 | -0.110131 | 0.052369 | 0.061134 | -0.149274 | 0.056448 | 0.163128 | -0.163128 |
| tenure_group_1 - 12 | -0.027713 | -0.191881 | -0.592443 | 0.001050 | -0.001050 | 0.305061 | -0.305061 | 0.145379 | -0.145379 | 1.000000 | ... | -0.251299 | -0.333850 | 0.003860 | -0.003860 | -0.185855 | -0.184165 | 0.160530 | 0.183222 | -0.319628 | 0.319628 |
| tenure_group_13 - 24 | 0.001860 | -0.047220 | -0.210745 | 0.000649 | -0.000649 | 0.048481 | -0.048481 | 0.001459 | -0.001459 | -0.276268 | ... | -0.017196 | -0.146749 | -0.003328 | 0.003328 | -0.046329 | -0.039647 | 0.030387 | 0.050371 | -0.019929 | 0.019929 |
| tenure_group_25 - 36 | 0.027317 | 0.009465 | -0.047370 | -0.006249 | 0.006249 | -0.003131 | 0.003131 | -0.009289 | 0.009289 | -0.245138 | ... | 0.082077 | -0.106618 | 0.005504 | -0.005504 | -0.000472 | 0.008599 | 0.003897 | -0.012360 | 0.040997 | -0.040997 |
| tenure_group_37 - 48 | -0.000929 | 0.017614 | 0.083696 | 0.020658 | -0.020658 | -0.035092 | 0.035092 | -0.023544 | 0.023544 | -0.233286 | ... | 0.122003 | -0.004919 | 0.001538 | -0.001538 | 0.029750 | 0.029093 | -0.019634 | -0.035775 | 0.059579 | -0.059579 |
| tenure_group_49 - 60 | 0.014186 | 0.070048 | 0.252905 | 0.004319 | -0.004319 | -0.105341 | 0.105341 | -0.031419 | 0.031419 | -0.245138 | ... | 0.158917 | 0.080082 | -0.010626 | 0.010626 | 0.060183 | 0.048167 | -0.030584 | -0.072170 | 0.100800 | -0.100800 |
| tenure_group_61 - 72 | -0.002407 | 0.185440 | 0.639312 | -0.016279 | 0.016279 | -0.280353 | 0.280353 | -0.118090 | 0.118090 | -0.334681 | ... | 0.016142 | 0.540336 | 0.001415 | -0.001415 | 0.184249 | 0.179294 | -0.175456 | -0.160053 | 0.226078 | -0.226078 |
| PhoneService_No | -0.008392 | -0.248033 | -0.113008 | -0.007515 | 0.007515 | 0.018397 | -0.018397 | -0.001078 | 0.001078 | 0.006950 | ... | 0.003142 | -0.004442 | 0.016696 | -0.016696 | -0.008271 | 0.006916 | -0.002747 | 0.004463 | 0.011691 | -0.011691 |
| PhoneService_Yes | 0.008392 | 0.248033 | 0.113008 | 0.007515 | -0.007515 | -0.018397 | 0.018397 | 0.001078 | -0.001078 | -0.006950 | ... | -0.003142 | 0.004442 | -0.016696 | 0.016696 | 0.008271 | -0.006916 | 0.002747 | -0.004463 | -0.011691 | 0.011691 |
| MultipleLines_No | -0.136377 | -0.338514 | -0.396765 | -0.004335 | 0.004335 | 0.130028 | -0.130028 | -0.023388 | 0.023388 | 0.256171 | ... | 0.001694 | -0.102756 | 0.151974 | -0.151974 | -0.069663 | -0.063712 | -0.080990 | 0.222395 | 0.032654 | -0.032654 |
| MultipleLines_No phone service | -0.008392 | -0.248033 | -0.113008 | -0.007515 | 0.007515 | 0.018397 | -0.018397 | -0.001078 | 0.001078 | 0.006950 | ... | 0.003142 | -0.004442 | 0.016696 | -0.016696 | -0.008271 | 0.006916 | -0.002747 | 0.004463 | 0.011691 | -0.011691 |
| MultipleLines_Yes | 0.142996 | 0.490912 | 0.469042 | 0.008883 | -0.008883 | -0.142561 | 0.142561 | 0.024307 | -0.024307 | -0.263331 | ... | -0.003594 | 0.106618 | -0.163746 | 0.163746 | 0.075429 | 0.060319 | 0.083583 | -0.227672 | -0.040033 | 0.040033 |
| InternetService_DSL | -0.108276 | -0.161368 | -0.052190 | -0.007584 | 0.007584 | 0.001043 | -0.001043 | -0.051593 | 0.051593 | -0.001470 | ... | 0.047300 | 0.030924 | 0.063390 | -0.063390 | 0.024760 | 0.051222 | -0.104293 | 0.042754 | 0.124141 | -0.124141 |
| InternetService_Fiber optic | 0.254923 | 0.787195 | 0.360769 | 0.011189 | -0.011189 | -0.001235 | 0.001235 | 0.164101 | -0.164101 | -0.021441 | ... | -0.076809 | -0.209965 | -0.326470 | 0.326470 | -0.022779 | -0.050552 | 0.335763 | -0.305984 | -0.307463 | 0.307463 |
| InternetService_No | -0.182519 | -0.763191 | -0.374878 | -0.004745 | 0.004745 | 0.000286 | -0.000286 | -0.138383 | 0.138383 | 0.027554 | ... | 0.038061 | 0.217542 | 0.320592 | -0.320592 | -0.001094 | 0.001870 | -0.284608 | 0.319694 | 0.227578 | -0.227578 |
| OnlineSecurity_No | 0.185145 | 0.360220 | -0.064515 | -0.010859 | 0.010859 | 0.129394 | -0.129394 | 0.186979 | -0.186979 | 0.196529 | ... | -0.122360 | -0.352447 | -0.267592 | 0.267592 | -0.084436 | -0.105963 | 0.335854 | -0.190919 | -0.342235 | 0.342235 |
| OnlineSecurity_No internet service | -0.182519 | -0.763191 | -0.374878 | -0.004745 | 0.004745 | 0.000286 | -0.000286 | -0.138383 | 0.138383 | 0.027554 | ... | 0.038061 | 0.217542 | 0.320592 | -0.320592 | -0.001094 | 0.001870 | -0.284608 | 0.319694 | 0.227578 | -0.227578 |
| OnlineSecurity_Yes | -0.038576 | 0.296447 | 0.412619 | 0.016328 | -0.016328 | -0.143346 | 0.143346 | -0.080786 | 0.080786 | -0.242409 | ... | 0.100658 | 0.191698 | 0.004051 | -0.004051 | 0.094366 | 0.115473 | -0.112295 | -0.079918 | 0.171270 | -0.171270 |
| OnlineBackup_No | 0.087539 | 0.210126 | -0.177633 | -0.008605 | 0.008605 | 0.135626 | -0.135626 | 0.137421 | -0.137421 | 0.233228 | ... | -0.112133 | -0.287128 | -0.144218 | 0.144218 | -0.082365 | -0.088189 | 0.236414 | -0.098438 | -0.267595 | 0.267595 |
| OnlineBackup_No internet service | -0.182519 | -0.763191 | -0.374878 | -0.004745 | 0.004745 | 0.000286 | -0.000286 | -0.138383 | 0.138383 | 0.027554 | ... | 0.038061 | 0.217542 | 0.320592 | -0.320592 | -0.001094 | 0.001870 | -0.284608 | 0.319694 | 0.227578 | -0.227578 |
| OnlineBackup_Yes | 0.066663 | 0.441529 | 0.510100 | 0.013093 | -0.013093 | -0.141849 | 0.141849 | -0.023639 | 0.023639 | -0.267366 | ... | 0.084113 | 0.111391 | -0.127056 | 0.127056 | 0.086942 | 0.090455 | -0.000364 | -0.174075 | 0.082307 | -0.082307 |
| DeviceProtection_No | 0.094403 | 0.171057 | -0.189485 | 0.003163 | -0.003163 | 0.146702 | -0.146702 | 0.128053 | -0.128053 | 0.239267 | ... | -0.130038 | -0.338520 | -0.166253 | 0.166253 | -0.078561 | -0.108008 | 0.239173 | -0.085850 | -0.252056 | 0.252056 |
| DeviceProtection_No internet service | -0.182519 | -0.763191 | -0.374878 | -0.004745 | 0.004745 | 0.000286 | -0.000286 | -0.138383 | 0.138383 | 0.027554 | ... | 0.038061 | 0.217542 | 0.320592 | -0.320592 | -0.001094 | 0.001870 | -0.284608 | 0.319694 | 0.227578 | -0.227578 |
| DeviceProtection_Yes | 0.059514 | 0.482607 | 0.522881 | 0.000807 | -0.000807 | -0.153556 | 0.153556 | -0.013900 | 0.013900 | -0.273920 | ... | 0.102911 | 0.165248 | -0.104079 | 0.104079 | 0.083047 | 0.111252 | -0.003308 | -0.187325 | 0.066193 | -0.066193 |
| TechSupport_No | 0.205254 | 0.321267 | -0.084270 | -0.003815 | 0.003815 | 0.108875 | -0.108875 | 0.171164 | -0.171164 | 0.193915 | ... | -0.118709 | -0.397788 | -0.229875 | 0.229875 | -0.090296 | -0.107761 | 0.338529 | -0.186388 | -0.336877 | 0.336877 |
| TechSupport_No internet service | -0.182519 | -0.763191 | -0.374878 | -0.004745 | 0.004745 | 0.000286 | -0.000286 | -0.138383 | 0.138383 | 0.027554 | ... | 0.038061 | 0.217542 | 0.320592 | -0.320592 | -0.001094 | 0.001870 | -0.284608 | 0.319694 | 0.227578 | -0.227578 |
| TechSupport_Yes | -0.060577 | 0.338301 | 0.432868 | 0.008507 | -0.008507 | -0.120206 | 0.120206 | -0.063053 | 0.063053 | -0.238628 | ... | 0.096258 | 0.240924 | -0.037536 | 0.037536 | 0.100472 | 0.117024 | -0.114807 | -0.084631 | 0.164716 | -0.164716 |
| StreamingTV_No | 0.048664 | 0.016015 | -0.197144 | -0.003088 | 0.003088 | 0.123394 | -0.123394 | 0.099912 | -0.099912 | 0.196100 | ... | -0.093495 | -0.254456 | -0.046715 | 0.046715 | -0.044887 | -0.041309 | 0.095426 | -0.022650 | -0.128435 | 0.128435 |
| StreamingTV_No internet service | -0.182519 | -0.763191 | -0.374878 | -0.004745 | 0.004745 | 0.000286 | -0.000286 | -0.138383 | 0.138383 | 0.027554 | ... | 0.038061 | 0.217542 | 0.320592 | -0.320592 | -0.001094 | 0.001870 | -0.284608 | 0.319694 | 0.227578 | -0.227578 |
| StreamingTV_Yes | 0.105445 | 0.629668 | 0.515709 | 0.007124 | -0.007124 | -0.124483 | 0.124483 | 0.016499 | -0.016499 | -0.220761 | ... | 0.061930 | 0.072124 | -0.224241 | 0.224241 | 0.046121 | 0.040010 | 0.144747 | -0.247712 | -0.063254 | 0.063254 |
| StreamingMovies_No | 0.034196 | 0.017271 | -0.202605 | -0.006078 | 0.006078 | 0.117488 | -0.117488 | 0.078245 | -0.078245 | 0.197479 | ... | -0.096613 | -0.258495 | -0.058987 | 0.058987 | -0.047677 | -0.049817 | 0.102617 | -0.019648 | -0.130920 | 0.130920 |
| StreamingMovies_No internet service | -0.182519 | -0.763191 | -0.374878 | -0.004745 | 0.004745 | 0.000286 | -0.000286 | -0.138383 | 0.138383 | 0.027554 | ... | 0.038061 | 0.217542 | 0.320592 | -0.320592 | -0.001094 | 0.001870 | -0.284608 | 0.319694 | 0.227578 | -0.227578 |
| StreamingMovies_Yes | 0.119842 | 0.627235 | 0.519867 | 0.010105 | -0.010105 | -0.118108 | 0.118108 | 0.038375 | -0.038375 | -0.221388 | ... | 0.064780 | 0.075603 | -0.211583 | 0.211583 | 0.048755 | 0.048398 | 0.137420 | -0.250290 | -0.060860 | 0.060860 |
| Contract_Month-to-month | 0.137752 | 0.058933 | -0.446776 | 0.003251 | -0.003251 | 0.280202 | -0.280202 | 0.229715 | -0.229715 | 0.492052 | ... | -0.570053 | -0.621933 | -0.168296 | 0.168296 | -0.180159 | -0.204960 | 0.330879 | 0.006209 | -0.404565 | 0.404565 |
| Contract_One year | -0.046491 | 0.004810 | 0.170569 | -0.007755 | 0.007755 | -0.083067 | 0.083067 | -0.069222 | 0.069222 | -0.251299 | ... | 1.000000 | -0.288843 | 0.052278 | -0.052278 | 0.057629 | 0.067590 | -0.109546 | 0.000197 | 0.178225 | -0.178225 |
| Contract_Two year | -0.116205 | -0.073256 | 0.358036 | 0.003603 | -0.003603 | -0.247334 | 0.247334 | -0.201699 | 0.201699 | -0.333850 | ... | -0.288843 | 1.000000 | 0.146281 | -0.146281 | 0.155004 | 0.174410 | -0.281147 | -0.007423 | 0.301552 | -0.301552 |
| PaperlessBilling_No | -0.156258 | -0.351930 | -0.157830 | -0.011902 | 0.011902 | -0.013957 | 0.013957 | -0.110131 | 0.110131 | 0.003860 | ... | 0.052278 | 0.146281 | 1.000000 | -1.000000 | 0.017469 | 0.013726 | -0.208427 | 0.203981 | 0.191454 | -0.191454 |
| PaperlessBilling_Yes | 0.156258 | 0.351930 | 0.157830 | 0.011902 | -0.011902 | 0.013957 | -0.013957 | 0.110131 | -0.110131 | -0.003860 | ... | -0.052278 | -0.146281 | -1.000000 | 1.000000 | -0.017469 | -0.013726 | 0.208427 | -0.203981 | -0.191454 | 0.191454 |
| PaymentMethod_Bank transfer (automatic) | -0.016235 | 0.042410 | 0.186119 | 0.015973 | -0.015973 | -0.111406 | 0.111406 | -0.052369 | 0.052369 | -0.185855 | ... | 0.057629 | 0.155004 | 0.017469 | -0.017469 | 1.000000 | -0.278423 | -0.377270 | -0.288097 | 0.118136 | -0.118136 |
| PaymentMethod_Credit card (automatic) | -0.024359 | 0.030055 | 0.182663 | -0.001632 | 0.001632 | -0.082327 | 0.082327 | -0.061134 | 0.061134 | -0.184165 | ... | 0.067590 | 0.174410 | 0.013726 | -0.013726 | -0.278423 | 1.000000 | -0.373978 | -0.285583 | 0.134687 | -0.134687 |
| PaymentMethod_Electronic check | 0.171322 | 0.271117 | -0.060436 | -0.000844 | 0.000844 | 0.083207 | -0.083207 | 0.149274 | -0.149274 | 0.160530 | ... | -0.109546 | -0.281147 | -0.208427 | 0.208427 | -0.377270 | -0.373978 | 1.000000 | -0.386971 | -0.301455 | 0.301455 |
| PaymentMethod_Mailed check | -0.152987 | -0.376568 | -0.294708 | -0.013199 | 0.013199 | 0.096948 | -0.096948 | -0.056448 | 0.056448 | 0.183222 | ... | 0.000197 | -0.007423 | 0.203981 | -0.203981 | -0.288097 | -0.285583 | -0.386971 | 1.000000 | 0.090773 | -0.090773 |
| Churn_No | -0.150541 | -0.192858 | 0.199484 | -0.008545 | 0.008545 | -0.149982 | 0.149982 | -0.163128 | 0.163128 | -0.319628 | ... | 0.178225 | 0.301552 | 0.191454 | -0.191454 | 0.118136 | 0.134687 | -0.301455 | 0.090773 | 1.000000 | -1.000000 |
| Churn_Yes | 0.150541 | 0.192858 | -0.199484 | 0.008545 | -0.008545 | 0.149982 | -0.149982 | 0.163128 | -0.163128 | 0.319628 | ... | -0.178225 | -0.301552 | -0.191454 | 0.191454 | -0.118136 | -0.134687 | 0.301455 | -0.090773 | -1.000000 | 1.000000 |
52 rows × 52 columns
In [36]:
plt.figure(figsize = (12,12))
sns.heatmap(data = c_matrix, cmap = "Paired")
Out[36]:
<Axes: >
SPLITTING THE DATASET
In [37]:
x = df4.drop(['Churn_Yes','Churn_No'],axis =1)
y = df4['Churn_Yes']
In [38]:
x
Out[38]:
| SeniorCitizen | MonthlyCharges | TotalCharges | gender_Female | gender_Male | Partner_No | Partner_Yes | Dependents_No | Dependents_Yes | tenure_group_1 - 12 | ... | StreamingMovies_Yes | Contract_Month-to-month | Contract_One year | Contract_Two year | PaperlessBilling_No | PaperlessBilling_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 29.85 | 29.85 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| 1 | 0 | 56.95 | 1889.50 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 2 | 0 | 53.85 | 108.15 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
| 3 | 0 | 42.30 | 1840.75 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
| 4 | 0 | 70.70 | 151.65 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 84.80 | 1990.50 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | ... | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
| 7039 | 0 | 103.20 | 7362.90 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | ... | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
| 7040 | 0 | 29.60 | 346.45 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| 7041 | 1 | 74.40 | 306.60 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
| 7042 | 0 | 105.65 | 6844.50 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
7032 rows × 50 columns
In [39]:
y
Out[39]:
0 0
1 0
2 1
3 0
4 1
..
7038 0
7039 0
7040 0
7041 1
7042 0
Name: Churn_Yes, Length: 7032, dtype: int64
In [40]:
from sklearn.model_selection import train_test_split
In [41]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 10)
In [42]:
len(x_train)
Out[42]:
5625
In [43]:
len(x_test)
Out[43]:
1407
In [44]:
from sklearn.metrics import mean_squared_error,confusion_matrix,classification_report,accuracy_score
from sklearn.metrics import roc_curve,roc_auc_score,accuracy_score,mean_absolute_error
from sklearn.metrics import r2_score,mean_squared_error
In [45]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
In [46]:
model.fit(x_train,y_train)
Out[46]:
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [47]:
model.score(x_test,y_test) # Accuracy
Out[47]:
0.3084976908935517
In [48]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [49]:
model.predict(x_test)
Out[49]:
array([-0.07128335, 0.02163858, 0.14581771, ..., -0.0334702 ,
0.19590921, 0.33938377])
In [50]:
y_test
Out[50]:
5401 0
1681 0
2076 0
3359 0
6629 0
..
1986 0
5608 0
6400 0
5560 0
3853 0
Name: Churn_Yes, Length: 1407, dtype: int64
In [51]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
In [52]:
model.fit(x_train,y_train)
Out[52]:
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
In [53]:
model.score(x_test,y_test)
Out[53]:
0.8130774697938877
In [54]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [55]:
CM = confusion_matrix(y_test,y_predicted)
CM
Out[55]:
array([[944, 102],
[161, 200]], dtype=int64)
In [56]:
plt.figure(figsize = (5,3))
sns.heatmap(CM, annot = True)
plt.xlabel("Predicted")
plt.ylabel("Truth")
Out[56]:
Text(33.22222222222222, 0.5, 'Truth')
In [57]:
from sklearn import tree
In [58]:
model = tree.DecisionTreeClassifier()
In [59]:
model.fit(x_train,y_train)
Out[59]:
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [60]:
model.score(x_test,y_test)
Out[60]:
0.7320540156361052
In [61]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [62]:
CM = confusion_matrix(y_test,y_predicted)
CM
Out[62]:
array([[825, 221],
[156, 205]], dtype=int64)
In [63]:
print(classification_report(y_test,y_predicted))
precision recall f1-score support
0 0.84 0.79 0.81 1046
1 0.48 0.57 0.52 361
accuracy 0.73 1407
macro avg 0.66 0.68 0.67 1407
weighted avg 0.75 0.73 0.74 1407
In [64]:
plt.figure(figsize = (5,3))
sns.heatmap(CM, annot = True)
plt.xlabel("Predicted")
plt.ylabel("Truth")
Out[64]:
Text(33.22222222222222, 0.5, 'Truth')
In [65]:
from sklearn.svm import SVC
model = SVC()
In [66]:
model.fit(x_train,y_train)
Out[66]:
SVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVC()
In [67]:
model.score(x_test,y_test)
Out[67]:
0.7434257285003554
In [68]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [69]:
CM = confusion_matrix(y_test,y_predicted)
CM
Out[69]:
array([[1046, 0],
[ 361, 0]], dtype=int64)
In [70]:
print(classification_report(y_test,y_predicted))
precision recall f1-score support
0 0.74 1.00 0.85 1046
1 0.00 0.00 0.00 361
accuracy 0.74 1407
macro avg 0.37 0.50 0.43 1407
weighted avg 0.55 0.74 0.63 1407
In [71]:
plt.figure(figsize = (5,3))
sns.heatmap(CM, annot = True)
plt.xlabel("Predicted")
plt.ylabel("Truth")
Out[71]:
Text(33.22222222222222, 0.5, 'Truth')
In [72]:
# multiple algo to predict outcome
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
In [73]:
model.fit(x_train,y_train)
Out[73]:
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
In [74]:
model.score(x_test,y_test)
Out[74]:
0.7931769722814499
In [75]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [76]:
CM = confusion_matrix(y_test,y_predicted)
CM
Out[76]:
array([[919, 127],
[164, 197]], dtype=int64)
In [77]:
print(classification_report(y_test,y_predicted))
precision recall f1-score support
0 0.85 0.88 0.86 1046
1 0.61 0.55 0.58 361
accuracy 0.79 1407
macro avg 0.73 0.71 0.72 1407
weighted avg 0.79 0.79 0.79 1407
In [78]:
plt.figure(figsize = (5,3))
sns.heatmap(CM, annot = True)
plt.xlabel("Predicted")
plt.ylabel("Truth")
Out[78]:
Text(33.22222222222222, 0.5, 'Truth')
In [79]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
In [80]:
model.fit(x_train,y_train)
Out[80]:
GaussianNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
In [81]:
model.score(x_test,y_test)
Out[81]:
0.689410092395167
In [82]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [83]:
CM = confusion_matrix(y_test,y_predicted)
CM
Out[83]:
array([[664, 382],
[ 55, 306]], dtype=int64)
In [84]:
print(classification_report(y_test,y_predicted))
precision recall f1-score support
0 0.92 0.63 0.75 1046
1 0.44 0.85 0.58 361
accuracy 0.69 1407
macro avg 0.68 0.74 0.67 1407
weighted avg 0.80 0.69 0.71 1407
In [85]:
plt.figure(figsize = (5,3))
sns.heatmap(CM, annot = True)
plt.xlabel("Predicted")
plt.ylabel("Truth")
Out[85]:
Text(33.22222222222222, 0.5, 'Truth')
In [86]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
In [87]:
model.fit(x_train,y_train)
Out[87]:
MultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MultinomialNB()
In [88]:
model.score(x_test,y_test)
Out[88]:
0.658137882018479
In [89]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [90]:
CM = confusion_matrix(y_test,y_predicted)
CM
Out[90]:
array([[637, 409],
[ 72, 289]], dtype=int64)
In [91]:
print(classification_report(y_test,y_predicted))
precision recall f1-score support
0 0.90 0.61 0.73 1046
1 0.41 0.80 0.55 361
accuracy 0.66 1407
macro avg 0.66 0.70 0.64 1407
weighted avg 0.77 0.66 0.68 1407
In [92]:
plt.figure(figsize = (5,3))
sns.heatmap(CM, annot = True)
plt.xlabel("Predicted")
plt.ylabel("Truth")
Out[92]:
Text(33.22222222222222, 0.5, 'Truth')
In [93]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
In [94]:
scaler.fit(x_test,y_test)
Out[94]:
StandardScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
StandardScaler()
In [95]:
scaler.fit_transform(x)
Out[95]:
array([[-0.44032709, -1.16169394, -0.99419409, ..., -0.5253508 ,
1.40476387, -0.54360352],
[-0.44032709, -0.26087792, -0.17373982, ..., -0.5253508 ,
-0.71186341, 1.83957601],
[-0.44032709, -0.36392329, -0.95964911, ..., -0.5253508 ,
-0.71186341, 1.83957601],
...,
[-0.44032709, -1.17000405, -0.85451414, ..., -0.5253508 ,
1.40476387, -0.54360352],
[ 2.27103902, 0.31916782, -0.87209546, ..., -0.5253508 ,
-0.71186341, 1.83957601],
[-0.44032709, 1.35793167, 2.01234407, ..., -0.5253508 ,
-0.71186341, -0.54360352]])
In [96]:
from sklearn.preprocessing import MinMaxScaler
model = MinMaxScaler()
In [97]:
model.fit(x_train,y_train)
Out[97]:
MinMaxScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MinMaxScaler()
In [98]:
model.fit_transform(x)
Out[98]:
array([[0. , 0.11542289, 0.0012751 , ..., 0. , 1. ,
0. ],
[0. , 0.38507463, 0.21586661, ..., 0. , 0. ,
1. ],
[0. , 0.35422886, 0.01031041, ..., 0. , 0. ,
1. ],
...,
[0. , 0.11293532, 0.03780868, ..., 0. , 1. ,
0. ],
[1. , 0.55870647, 0.03321025, ..., 0. , 0. ,
1. ],
[0. , 0.86965174, 0.78764136, ..., 0. , 0. ,
0. ]])
In [99]:
from sklearn.decomposition import PCA
model = PCA()
In [100]:
model.fit(x_train,y_train)
Out[100]:
PCA()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
PCA()
In [101]:
model.fit_transform(x)
Out[101]:
array([[-2.25366835e+03, -1.54045960e+01, 2.41138473e+00, ...,
-1.51660698e-13, -5.75917529e-14, 8.49111474e-14],
[-3.93853721e+02, -4.44308307e+00, 1.11164960e+00, ...,
-1.39011273e-13, -5.43819132e-13, -2.50122100e-13],
[-2.17516400e+03, 7.87670140e+00, 1.15303139e+00, ...,
4.61012805e-13, -3.30919668e-13, -3.11723294e-13],
...,
[-1.93708238e+03, -1.84016387e+01, 2.46992124e+00, ...,
-8.50874545e-14, 2.89204519e-14, -1.14256834e-14],
[-1.97654365e+03, 2.67272984e+01, 2.46058887e-01, ...,
-9.49510228e-14, 9.17079680e-14, -3.79921907e-14],
[ 4.56138245e+03, 1.41903237e+00, -4.40312995e-01, ...,
-1.24732051e-13, 9.92906304e-14, -1.56826950e-14]])
In [102]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
model=lr.fit(x_train,y_train)
pred=model.predict(x_test)
pred
Out[102]:
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
In [103]:
from sklearn.metrics import mean_squared_error,confusion_matrix,classification_report,accuracy_score
from sklearn.metrics import roc_curve,roc_auc_score,accuracy_score,mean_absolute_error
from sklearn.metrics import r2_score,mean_squared_error
In [104]:
print('classification_report')
print(classification_report(y_test,pred))
classification_report
precision recall f1-score support
0 0.85 0.90 0.88 1046
1 0.66 0.55 0.60 361
accuracy 0.81 1407
macro avg 0.76 0.73 0.74 1407
weighted avg 0.81 0.81 0.81 1407
In [105]:
print('accuracy_score')
print(accuracy_score(y_test,pred))
accuracy_score 0.8130774697938877
In [106]:
print('confusion_matrix')
print(confusion_matrix(y_test,pred))
confusion_matrix [[944 102] [161 200]]
In [107]:
print('mean_squared_error')
print(mean_squared_error(y_test,pred))
mean_squared_error 0.1869225302061123
In [108]:
print('roc_curve')
print(roc_curve(y_test,pred))
roc_curve (array([0. , 0.09751434, 1. ]), array([0. , 0.55401662, 1. ]), array([2, 1, 0], dtype=int64))
In [109]:
print('roc_auc_score')
print(roc_auc_score(y_test,pred))
roc_auc_score 0.7282511400772234
In [110]:
from sklearn import metrics
XYZ = lr.predict_proba(x_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test, XYZ)
#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
In [111]:
print('accuracy_score')
print(accuracy_score(y_test,pred))
accuracy_score 0.8130774697938877
In [112]:
print('mean_absolute_error')
print(mean_absolute_error(y_test,pred))
mean_absolute_error 0.1869225302061123
In [113]:
print('r2_score')
print(r2_score(y_test,pred))
r2_score 0.02003410962749541
In [114]:
print('mean_squared_error')
print(mean_squared_error(y_test,pred))
mean_squared_error 0.1869225302061123
In [115]:
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier()
In [116]:
model.fit(x_test,y_test)
Out[116]:
GradientBoostingClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GradientBoostingClassifier()
In [117]:
model.score(x_test,y_test)
Out[117]:
0.8869936034115139
In [118]:
y_predicted = model.predict(x_test)
from sklearn.metrics import confusion_matrix
In [119]:
CM = confusion_matrix(y_test,y_predicted)
CM
Out[119]:
array([[1004, 42],
[ 117, 244]], dtype=int64)
In [120]:
print(classification_report(y_test,y_predicted))
precision recall f1-score support
0 0.90 0.96 0.93 1046
1 0.85 0.68 0.75 361
accuracy 0.89 1407
macro avg 0.87 0.82 0.84 1407
weighted avg 0.88 0.89 0.88 1407
In [121]:
plt.figure(figsize = (5,3))
sns.heatmap(CM, annot = True)
plt.xlabel("Predicted")
plt.ylabel("Truth")
Out[121]:
Text(33.22222222222222, 0.5, 'Truth')
In [122]:
import tensorflow
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
In [123]:
model = Sequential()
model.add(Dense(3,activation = 'sigmoid',input_dim =50))
model.add(Dense(1,activation = 'sigmoid'))
In [124]:
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 3) │ 153 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 1) │ 4 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 157 (628.00 B)
Trainable params: 157 (628.00 B)
Non-trainable params: 0 (0.00 B)
(503)+3 = 153 __ Nural Network__ (31)+1 =4
In [125]:
model.compile(loss = 'binary_crossentropy',optimizer = 'Adam')
In [126]:
history = model.fit(x_train,y_train, epochs = 10,validation_split = 0.2)
Epoch 1/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 3s 5ms/step - loss: 0.5621 - val_loss: 0.5353 Epoch 2/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.5435 - val_loss: 0.5085 Epoch 3/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.5249 - val_loss: 0.4995 Epoch 4/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.5088 - val_loss: 0.4964 Epoch 5/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.5063 - val_loss: 0.4822 Epoch 6/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.4996 - val_loss: 0.4748 Epoch 7/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.4954 - val_loss: 0.4680 Epoch 8/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.4954 - val_loss: 0.4767 Epoch 9/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.4844 - val_loss: 0.4740 Epoch 10/10 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 0.4943 - val_loss: 0.4747
In [127]:
history.history
Out[127]:
{'loss': [0.557719886302948,
0.5357632040977478,
0.5170425772666931,
0.5098909139633179,
0.5036367177963257,
0.4997670650482178,
0.49613064527511597,
0.49850934743881226,
0.4926097095012665,
0.49803227186203003],
'val_loss': [0.5352861881256104,
0.5085173845291138,
0.4994664192199707,
0.4964390993118286,
0.48221153020858765,
0.474754273891449,
0.46796777844429016,
0.47672298550605774,
0.47400644421577454,
0.4747053384780884]}
In [128]:
model.layers[0].get_weights()
Out[128]:
[array([[-0.5463179 , 0.05650653, 0.3650551 ],
[ 0.03742829, -0.14135025, 0.13952711],
[-0.06450991, 0.06037767, -0.00681495],
[-0.17995971, -0.28102002, -0.1785467 ],
[-0.00374536, -0.19732894, -0.4361244 ],
[-0.04328292, -0.14842427, 0.11460909],
[ 0.26719922, 0.03449143, 0.01766997],
[ 0.3253568 , 0.12997107, 0.17704494],
[ 0.01427152, -0.04898315, -0.2847259 ],
[ 0.10231312, -0.02309847, -0.4784282 ],
[-0.22372206, -0.28719616, -0.2697603 ],
[-0.09915258, -0.08608794, 0.08483568],
[-0.23850799, 0.03314799, -0.17219527],
[-0.04669595, 0.05355498, -0.07766202],
[ 0.33629373, -0.02160749, 0.06255865],
[ 0.26900995, -0.32996234, -0.09059858],
[ 0.03602493, -0.20280282, -0.45306292],
[-0.08447246, 0.21260592, -0.14637433],
[ 0.1530685 , 0.08943116, -0.3217084 ],
[-0.02512531, -0.19206329, 0.04147688],
[ 0.43579125, -0.17336755, -0.23017651],
[-0.42269108, -0.07303046, 0.74278957],
[-0.14707884, -0.04528806, -0.5965806 ],
[-0.0638312 , 0.18509586, -0.14188132],
[ 0.0486447 , -0.06334215, -0.54371154],
[ 0.14677532, 0.3282468 , -0.4293225 ],
[-0.27745435, -0.3757026 , 0.15246469],
[-0.25772613, 0.32732505, -0.59927493],
[ 0.1539736 , 0.3636654 , -0.3323758 ],
[-0.05427227, 0.19733638, 0.30169678],
[ 0.02738924, -0.28589797, -0.20484433],
[-0.48669294, -0.32840505, -0.05018048],
[ 0.12742779, 0.1878976 , 0.31367347],
[ 0.34434706, 0.12863828, -0.1261132 ],
[-0.09824098, -0.177224 , -0.629373 ],
[ 0.1405677 , -0.24785283, -0.32302347],
[ 0.31714106, 0.19785787, -0.32872766],
[-0.40762943, -0.02202698, -0.01933847],
[ 0.04765834, 0.15024097, -0.30880463],
[-0.05138935, 0.02901016, -0.5496591 ],
[ 0.02611189, 0.04118262, 0.0641803 ],
[ 0.26618567, 0.13252485, -0.17799418],
[ 0.43030825, 0.37173033, -0.8854663 ],
[ 0.1212143 , 0.14268833, -0.8258031 ],
[ 0.25864804, 0.4541043 , -0.8002766 ],
[-0.38748187, -0.08299001, -0.0490102 ],
[ 0.02894197, 0.20624974, -0.4041168 ],
[ 0.37240303, 0.1631575 , -0.11515518],
[-0.48053962, -0.26066142, 0.41042686],
[ 0.44895425, 0.01954965, -0.67026377]], dtype=float32),
array([ 0.07838272, -0.00113499, -0.23922464], dtype=float32)]
In [129]:
model.layers[1].get_weights()
Out[129]:
[array([[-0.35099477],
[-1.2909899 ],
[ 1.4429581 ]], dtype=float32),
array([-0.40447542], dtype=float32)]
In [130]:
model.predict(x_test)
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
Out[130]:
array([[0.15514916],
[0.15505856],
[0.23293753],
...,
[0.15505846],
[0.15505846],
[0.1550587 ]], dtype=float32)
To convert in to "0" , "1" we have to give a threshhold value
In [131]:
y_log = model.predict(x_test)
44/44 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
In [132]:
np.where(y_log>0.5,1,0)
Out[132]:
array([[0],
[0],
[0],
...,
[0],
[0],
[0]])
In [133]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_predicted)
Out[133]:
0.8869936034115139
In [134]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
Out[134]:
[<matplotlib.lines.Line2D at 0x250edc37910>]
In [135]:
print(classification_report(y_test,y_predicted))
precision recall f1-score support
0 0.90 0.96 0.93 1046
1 0.85 0.68 0.75 361
accuracy 0.89 1407
macro avg 0.87 0.82 0.84 1407
weighted avg 0.88 0.89 0.88 1407
In [ ]: